*Computation of MLD by gender

*Tim Goedemé, 27/04/2021


/*

Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. 
This file can be changed and re-shared for non-commercial use, as long as our original work 
is recognised and the revised work is made available under the same conditions.

When using this do-file, please cite as:
Goedemé, T., Nolan, B., Paskov, M., & Weisstanner, D. (2021). 
Occupational Social Class and Earnings Inequality in Europe: A Comparative Assessment. 
In: Social Indicators Research. DOI: https://doi.org/10.1007/s11205-021-02746-z; https://timgoedeme.com/tools/esec-in-eu-silc/

This file makes use of the decomld command which I wrote for this paper,
but is not available online. Please get in touch if you would like to make use
of this command.

*/


*globals IS and SK dropped from list of countries

global place1 <<data directory>>
global place2 <<output directory>>
global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK

*Estimations - 9 classes ONLY EU-SILC 2018

foreach ctry of global countries {
	di "`ctry'", _continue
	cap mat drop results	
	quietly {
		local year 2018
		cap use "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta", clear
		if _rc==0 {
			drop age hydisp eqs hystd thresh60 arop60 actage active
			
			svyset psu1 [pw=weight], strata(strata1)
			
			gen sub1 = sub
			replace sub1 = 0 if sex==1 // sub is 1 for males
			gen sub2 = sub
			replace sub2 = 0 if sex==0 // sub is 1 for females
			
			*if regexm("DK DE NL MT EE BE CZ NO SE LU CH PT SI CY UK FR IT", "`ctry'")==1 replace sub2 = 0 if esec08==5 
			*if "`ctry'"=="DK" replace sub2 = 0 if (esec08==5 | esec08==4)
			*if "`ctry'"=="RO" replace sub2 = 0 if (esec08==6)
			
			
			forvalues sub=1/2 {
				forvalues x=2(1)2 {
					noi di "subpop:`sub': earnings:`x';", _continue
					decomld earns`x', na(temp1) group(esec08) subpop(sub`sub')
					local val=rowsof(temp1)
					cap mat drop tempmat
					forvalues row=1(1)`val' {
						mat def tempmat = nullmat(tempmat) \ `x', `sub'
					}
					mat colnames tempmat = earnings subpop
					mat def matje = temp1, tempmat
					mat def results = nullmat(results) \ matje

				}
			}
			di "."
			
		}
		clear
		svmat results, names(col)
		gen country="`ctry'"
		
		save "${place2}\Decomp_`ctry'_Gender.dta", replace // esec08
	}
}

*Putting estimations together

clear
foreach ctry of global countries {
	append using "${place2}\Decomp_`ctry'_Gender.dta"
}
save "${place2}\Decomp_Esec9_2018_Gender.dta", replace

order country EST SE

**********************************************************************************************
*Estimations limited to classes and samples with full information (cf. 34_Counterfactual MLD)*
**********************************************************************************************
*B. Females
************
***==> Drop Denmark from list:
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI 

set matsize 5000, permanent

foreach ctry of global countries {
	di "`ctry'", _continue
	cap mat drop results	
	quietly {
		local year 2018
		cap use "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta", clear
		if _rc==0 {
			drop age hydisp eqs hystd thresh60 arop60 actage active
			svyset psu1 [pw=weight], strata(strata1)
			
			replace sub = 0 if missers2!=0
			replace sub = 0 if earns1<=0 | earns2<=0
			
			replace sub = 0 if sex==0
			
			gen class=esec08
			
			*Drop classes that account for less than 1.5 of weighted population or with fewer than 30 observations
			if "`ctry'"=="NO" replace class = . if (esec08==4)
			if "`ctry'"=="NO" replace sub = 0 if (esec08==4)
			
			if regexm("AT EL IT PL RO RS", "`ctry'")!=1 replace class = . if esec08==5 // (class 5 accounts for less than 1.5% of population in paid employment)
			if regexm("AT EL IT PL RO RS", "`ctry'")!=1 replace sub = 0 if esec08==5 
			
			
			if "`ctry'"=="RO" replace class = . if (esec08==6)
			if "`ctry'"=="RO" replace sub = 0 if (esec08==6)
			
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace class=. if esec08==8
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace sub=0 if esec08==8
			
			
			drop esec08
			
						
			cap mat drop results_`ctry' 
			local conflevel 0.025
			
			forvalues x=1/2 {
				noi di "earnings:`x';", _continue
				decomld earns`x', na(temp1) group(class) subpop(sub`sub')
				local val=rowsof(temp1)
				cap mat drop tempmat
				forvalues row=1(1)`val' {
					mat def tempmat = nullmat(tempmat) \ `x', 1
				}
				mat colnames tempmat = earnings subpop
				mat def matje = temp1, tempmat
				mat def results = nullmat(results) \ matje
			}
			
			
			noi di "."
			
		}
		*Write results to files
		***********************
		clear
		svmat results, names(col)
		gen country="`ctry'"
		
		save "${place2}\Decomp_`ctry'_Gender_nonmissing.dta", replace // esec08
	}
}

*Putting estimations together
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK
clear
foreach ctry of global countries {
	append using "${place2}\Decomp_`ctry'_Gender_nonmissing.dta"
}
save "${place2}\Decomp_Esec9_2018_Females_nonmissing.dta", replace

order country EST SE



*B. Males
************
***==> Drop Denmark from list:
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IE IT LT LU LV MT NL NO PL PT RO RS SE SI UK

set matsize 5000, permanent
foreach ctry of global countries {
	di "`ctry'", _continue
	cap mat drop results	
	quietly {
		local year 2018
		cap use "${place1}\`ctry'\\`year'\c`ctry'`year'_addvars2.dta", clear
		if _rc==0 {
			drop age hydisp eqs hystd thresh60 arop60 actage active
			svyset psu1 [pw=weight], strata(strata1)
			
			replace sub = 0 if missers2!=0
			replace sub = 0 if earns1<=0 | earns2<=0
			
			replace sub = 0 if sex==1
			
			gen class=esec08
			
			*Drop classes that account for less than 1.5 of weighted population or with fewer than 30 observations
			if "`ctry'"=="NO" replace class = . if (esec08==4)
			if "`ctry'"=="NO" replace sub = 0 if (esec08==4)
			
			if regexm("BE CH CZ DE DK EE LU MT NL NO PT SE", "`ctry'")==1 replace class = . if esec08==5 // (class 5 accounts for less than 1.5% of population in paid employment)
			if regexm("BE CH CZ DE DK EE LU MT NL NO PT SE", "`ctry'")==1 replace sub = 0 if esec08==5 

			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace class=. if esec08==8
			if regexm("BE CY LU MT NL NO SE UK", "`ctry'")==1 replace sub=0 if esec08==8
			
		drop esec08
			
						
			cap mat drop results_`ctry' 
			local conflevel 0.025
			
			forvalues x=2/2 {
				noi di "earnings:`x';", _continue
				decomld earns`x', na(temp1) group(class) subpop(sub`sub')
				local val=rowsof(temp1)
				cap mat drop tempmat
				forvalues row=1(1)`val' {
					mat def tempmat = nullmat(tempmat) \ `x', 0
				}
				mat colnames tempmat = earnings subpop
				mat def matje = temp1, tempmat
				mat def results = nullmat(results) \ matje
			}
			
			
			noi di "."
			
		}
		*Write results to files
		***********************
		clear
		svmat results, names(col)
		gen country="`ctry'"
		
		save "${place2}\Decomp_`ctry'_Males_nonmissing.dta", replace // esec08
	}
}

*Putting estimations together

***==> Ireland dropped, as estimation went wrong, done separately on data file, not sure what problem was
global countries AT BE BG CH CY CZ DE EE EL ES FI FR HR HU IT LT LU LV MT NL NO PL PT RO RS SE SI UK
clear
foreach ctry of global countries {
	append using "${place2}\Decomp_`ctry'_Males_nonmissing.dta"
}
save "${place2}\Decomp_Esec9_2018_Males_nonmissing.dta", replace

order country EST SE
